TEXSYN.SAI[TEX,DEK]10 - www.SailDart.org

perm filename TEXSYN.SAI[TEX,DEK]10 blob sn#436993 filedate 1979-04-27 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00017 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00004 00002	entry begin comment The syntax module of TEX.
C00008 00003	A list of the command codes
C00016 00004	The hash table: hash,eqtb,idlen,idlev,idtyp
C00023 00005	Saving and restoring eqtb values: eqdefine,newsavelevel,eqdestroy,unsave
C00032 00006	Hash table algorithms: idlookup,controlseq,idname,hashentry,eqlink
C00040 00007	The input stacks: inbuf,curbuf,state,loc,recovery,filename,parstack
C00048 00008	Tokens, token lists, and the diagnostic routines dumplist,dumptokens
C00055 00009	Maintaining the input stacks: pushinput,popinput,initin,dumpcontext,inslist
C00062 00010	The basic input procedure getnext and its cousins gettok,getncnext,getnctok
C00075 00011	Defining user control sequences and output routines: macrodef,scantoks
C00084 00012	Calling user macros: macrocall
C00091 00013	Accessing user's files: scanfilename, inputfile, definefont
C00101 00014	Basic scanning routines: backinput,scandigit,scanlb,scanstring,scannumber
C00107 00015	Further scanning routines: scanlength,scanposlength,scanglue,scanspec
C00113 00016	Additional scanning routines: scanfont,scandelim,scanrulespec
C00117 00017	Still more scanning routines: passblock,insnum,scancond
C00121 ENDMK
C⊗;
entry; begin comment The syntax module of TEX.

(It is wise to read the memory allocation sections of TEXSYS
before delving very deeply into the following code.)

The purpose of these routines is to deliver the user's input to
the semantics module of TEX, one token at a time. This module
also contains utility subroutines for syntactic operations such as
the scanning of glue specifications. The save-and-restore mechanism,
which maintains the current meanings of control sequences, appears here too.

Each call of the procedure "getnext" sets the value of two variables
"curcmd" and "curchar", representing the next input token.
	curcmd denotes a command code,
	curchar denotes a character code or other modifier of the command code.
The semantics module acts as an interpretive routine responding to these commands.

Underneath this external behavior of "getnext" is all the machinery necessary
to convert from character files to tokens. At a given time we may be partially
finished reading some files (when \input was sensed), partially finished
expanding some user-defined macro and perhaps one of its parameters, partially
finished generating some of the standard code in an \halign, and so on.
When reading a character file, comments and redundant blank spaces must be
removed, paragraphs must be recognized.  Furthermore there are occasions
in which the scanner has looked ahead for a word like "plus" but has found
only part of that word, hence a few characters must be fed back and scanned
again. To handle all these situations, there are various stacks that
hold information about the incomplete activities, and a finite state control
for each level of the input control. These stacks record the current state
of an implicitly recursive process, but the procedures themselves are
nonrecursive. This has been done so that low-level implementations of the
same algorithms are easy to create and because getnext acts as a coroutine of
the semantic actions;

require "TEXHDR.SAI" source_file;
internaldef closegroupchar = ifc TENEX thenc "}" elsec "}" endc;

internal integer curcmd # the current command code appearing in the input;
internal integer curchar # the current character code appearing in the input;
comment A list of the command codes;

comment The following definitions attach numeric codes to the various
"commands" interpreted by TEX. The symbolic names of these codes are
used elsewhere. Sometimes the ordering of the codes is important
(e.g. we might branch on cmd > font), so the codes are not completely
arbitrary;

internaldef escape=0	# escape delimiter (\ in TEX manual);
internaldef lbrace=1	# begin block symbol ( { );
internaldef rbrace=2	# end block symbol ( } );
internaldef mathbr=3    # math break ( $ );
internaldef tabmrk=4	# tab mark ( ⊗ );
internaldef carret=5	# carriage return and comment mark ( % );
comment carret is also used as the command code for \cr;
internaldef macprm=6	# macro parameter ( # );
internaldef supmrk=7	# superscript ( ↑ );
internaldef submrk=8	# subscript ( ↓ );
internaldef ignore=9	# chars to ignore;
internaldef spacer=10	# chars treated as blank space;
internaldef letter=11	# chars treated as letters;
internaldef otherchar=12 # none of the above character types;
internaldef parend=13	# end of paragraph;

internaldef match=14	# macro parameter matching;
internaldef outpar=ignore # output a macro parameter;
internaldef endv=15	# end of vlist in halign or valign template;
internaldef call=16	# call a user-defined macro;
internaldef xt=17 	# extensions to basic TEX (\x);

internaldef assignglue=18 # user-defined glue;
internaldef font=19	# user-defined current font;

comment Codes "assignglue" thru "font", inclusive, are not redefinable by \def,
since their eqtb entries are used by TEX semantics. Codes "call" thru
"assignglue", inclusive, have eqtb entries whose link fields may point into mem,
so they should not be changed without deallocation;

internaldef assignreal=20 # user-defined length;
internaldef def=21	# macro definition (\def,\gdef);
internaldef output=22	# output routine definition (\output);
internaldef innput=23	# required input file (\input);
internaldef setpar=24	# set TEX control parameter (\trace,\jpar);
internaldef stop=25	# end of input (\end);
internaldef ddt=26	# emergency debugging (\ddt);
internaldef ascii=27	# code for possibly untypeable character (\char);
internaldef chcode=28	# change chartype table (\chcode);
internaldef fntfam=29	# declare font family (\mathrm,etc.);
internaldef setcount=30 # set current page number (\setcount);
internaldef advcount=31 # increase current page number (\advcount);
internaldef count=32	# insert current page number (\count);
internaldef ifeven=33	# conditional on count even (\ifeven);
internaldef ifT=34	# conditional on character T (\ifT);
internaldef elsecode=35	# delimiter for conditionals (\else);
internaldef box=36	# saved box (\box,\page) or justification(\hbox,\vbox);
internaldef hmove=37	# horizontal motion of box (\moveleft,\moveright);
internaldef vmove=38	# vertical motion of box (\raise,\lower);
internaldef save=39	# save a box (\save);
internaldef leaders=40	# define leaders (\leaders);
internaldef halign=41	# horizontal table alignment (\halign);
internaldef valign=42	# vertical table alignment (\valign);
internaldef noalign=43	# insertion into halign or valign (\noalign);
internaldef vskip=44	# vertical glue (\vskip,\vfill);
internaldef hskip=45	# horizontal glue (\hskip,\hfill);
internaldef vrule=46	# vertical rule (\vrule);
internaldef hrule=47	# horizontal rule (\hrule);
internaldef topbotins=48 # inserted vlist (\topinsert or \botinsert);
internaldef topbotmark=49 # insert mark (\topmark,\botmark);
internaldef mark=50	# define a mark (\mark);
internaldef penalty=51	# specify badness of break (\penalty);
internaldef noindent=52	# begin nonindented paragraph (\noindent);
internaldef eject=53	# eject page here (\eject);
internaldef discr=54	# discretionary hyphen (\-,\*);
internaldef accent=55	# attach accent to character (\+);
internaldef newaccent=56 # define nonstandard accent (\accent);
internaldef eqno=57	# insert equation number (\eqno);
internaldef mathonly=58	# character or token allowed in mathmode only;
internaldef exspace=59	# explicit space (\ );
internaldef nonmathletter=60 # letter except in mmode;
internaldef leftright=61 # variable delimiter (\left, \right);
internaldef caseshift=62 # force specified case (\uppercase, \lowercase);
internaldef mathinput=63 # component of math formula (\mathop,\mathbin, etc.);
internaldef limsw=64	# modify limit conventions (\limitswitch);
internaldef above=65	# numerator-denominator separator(\above,\atop,\over,\comb);
internaldef mathstyle=66 # style or space specification (\dispstyle,\,,etc.);
internaldef italcorr=67 # italic correction (\/);
internaldef vcenter=68  # vbox centered on axis (\vcenter);
internaldef hangindent=69 # specifies hanging indentation (\hangindent);
internaldef unskip=70 # nullifies glue (\unskip);

internaldef maxopcode=unskip # the largest code number;
internaldef charcodes=otherchar+1 # number of distinct codes allowed in chartype;
internaldef texpars=10 # number of distinct parameters settable by setpar command;
comment The hash table: hash,eqtb,idlen,idlev,idtyp;

comment Control sequences, some of which are predeclared, are recorded in a
hash table, with an associated table of their equivalent meanings. Linear
probing (e.g., Algorithm 6.4L in ACP) is used to access this table, which is
in three parts: The last 128 words are for the command codes associated with
characters read from external files (these are changed by \chcode).
The next-to-last 128 words are for single-character control sequences
that are addressed directly. The first words are for packed representations
of longer control seqences, using six bits for the first letter (in order to
distinguish upper and lower case) and five bits for each remaining letter,
left justified in the word.

Entries in the equivalents table contain several fields:
	idlen	(length-1) mod 8 of the name
	idlev	level of {...} nesting at which this equivalent was defined
	idcmd	command code for the name
	link	pointer into mem or modifier of idcmd
The value of idlev is nonzero whenever the equivalent is defined: level 1
stands for initial default values and user definitions not in braces.
(Exception: idlev=0 in the last 128 words, as these words are treated specially.)
The value of idcmd is used to determine, among other things, what to do when
the equivalent value changes -- for example, if link points to a node 
representing glue, we probably want to call procedure delgluelink when
this field changes;

internaldef hashsize = 353 # hashtable size, should be prime and < 2↑chars-127;
internal saf integer array hash[0:hashsize-1] # hash table for packed names;
internaldef eqtbsize=hashsize+128+256+15+texpars # size of table for current values;
internal saf integer array eqtb[0:eqtbsize-1] # equivalents of symbols & parameters;
internaldef chartype(c) = ⊂eqtb[c+(hashsize+128)]⊃ # cmds associated with chars;
internaldef mmodecode(c) = ⊂eqtb[c+(hashsize+256)]⊃ # codes for mathmode equivalents;
comment eqtb[hashsize+384:hashsize+395] is for the "mathfonttable", see TEXSEM p.7;
internaldef innerhangbegin = ⊂eqtb[hashsize+396]⊃ # hanging indent for \hbox par ...;
internaldef innerhangfirst = ⊂eqtb[hashsize+397]⊃ # see TEXSEM p.18,19;
internaldef innerhangwidth = ⊂memory[location(eqtb[hashsize+398]),real]⊃;
internaldef tracing = ⊂eqtb[hashsize+399]⊃ # controls diagnostics, see TEXSEM p.4;
internaldef jpar = ⊂eqtb[hashsize+400]⊃ # controls justification, see TEXSEM p.13;
internaldef hpen = ⊂eqtb[hashsize+401]⊃ # hyphenation penalty, see TEXSEM p.13;
internaldef penpen = ⊂eqtb[hashsize+402]⊃ # penultimate penalty, see TEXSEM p.13;
internaldef wpen = ⊂eqtb[hashsize+403]⊃ # widow-line penalty, see TEXSEM p.13;
internaldef bpen = ⊂eqtb[hashsize+404]⊃ # broken-line penalty, see TEXSEM p.13;
internaldef mbpen = ⊂eqtb[hashsize+405]⊃ # binary-op-break penalty, see TEXSEM p.15;
internaldef mrpen = ⊂eqtb[hashsize+406]⊃ # relation-break penalty, see TEXSEM p.15;
internaldef ragged = ⊂eqtb[hashsize+407]⊃ # raggedness, see TEXSEM p.13;
internaldef disppen = ⊂eqtb[hashsize+408]⊃ # penalty before a display, see TEXSEM p.18;

internaldef idlens=3,idlend=links 			# idlen field in eqtb;
internaldef idlevs=5,idlevd=idlens+idlend		# idlev field in eqtb;
internaldef idcmdd=idlevs+idlevd,idcmds=bitsperwd-idcmdd # idcmd field in eqtb;
comment Saving and restoring eqtb values: eqdefine,newsavelevel,eqdestroy,unsave;

comment The nested structure provided by { and } blocks in TEX means that
eqtb entries of outer blocks should be saved and restored. Furthermore,
it is often necessary to free up some memory when an eqtb entry is changed.

The procedure eqdefine is used to set a new eqtb entry. If a previous value
was defined at the same nesting level, it is destroyed (using procedure
"eqdestroy", which frees memory if appropriate), and the new value is inserted.
If a previous value was defined at an outer nesting level (indicated by its
idlev field), the old value is placed on savestack and the new value is
inserted. At the end of a nesting level, i.e., when the } is sensed, the
savestack is used to restore the outer values and the inner ones are destroyed.

Entries on savestack are of three main forms:
	"-c" where c is an ending-routine code
denotes the first entry on a given nesting level, placed on savestack when
{ is sensed. These codes are defined in TEXSEM (cf. the processing of
rbrace in main_control), they indicate what action to perform when the }
comes along. Furthermore, some routines such as hbox and halign place
another word or three onto savestack, immediately below the "-c", denoting
parameters that tell the desired final size and disposition of the box. These
parameters are removed at the time the -c is removed, so the save and restore
routines of concern to us here do not have to know about such extra words.
	value,index 	two words, the top word being ≥0
means that when } is sensed eqtb[index] should be reset to value.
	(1,index)	one word, the index in the link field
means that when } is sensed eqtb[index] and hash[index] should be reset to zero.

Procedure newsavelevel is called when a { is sensed, and restore is called
when a } is sensed;

internal integer curlev # the current level of nesting, times 2↑idlevd;
internaldef savesize = 140 # size of savestack;
internal integer saveptr # first unused entry on savestack;
internal saf integer array savestack[0:savesize+2] # place for dormant eqtb entries;
comment By saying "+2" instead of "-1" on the previous line, we make it possible
to avoid testing for saveptr overflow, up to thrice in a row (excuse the trick);

internaldef level1 = 1 lsh idlevd;

internal procedure initsave # initialize the save-restore mechanism;
begin curlev ← level1;
saveptr←0;
end;

simp procedure eqdestroy(integer eqtbval);
begin comment Frees memory, if necessary, when the given value from eqtb is
to be forgotten;
integer p,c;
p←field(link,eqtbval);
c←field(idcmd,eqtbval);
case c of begin
[call] delrclink(p) # p points to reference count
		of token list for user-defined macro;
[xt] eqdestroyext(p) # possible extension to TEX;
[assignglue] delgluelink(p) # p points to glue node;
else comment do nothing if redefining other control sequences;
  end;
end;

internal simp procedure eqdefine(integer index,cmd,lnk) # change eqtb entry;
begin comment This procedure defines an eqtb entry having specified idcmd
and link fields, and saves the former value if appropriate;
integer t,l;
l←ufield(idlev,t←eqtb[index]);
if l=curlev then
	eqdestroy(t) comment redefinition on same level;
else if curlev>level1 then
	begin comment save definition on old level;
	if saveptr≥savesize-1 then overflow(savesize);
	savestack[saveptr]←t; savestack[saveptr+1]←index;
	saveptr←saveptr+2 # store two words on savestack;
	end;
eqtb[index]←ufield(idlen,t) + (cmd lsh idcmdd) + curlev + lnk;
end;

internal procedure chcodedef(integer index,valu) # eqdefine for char codes;
begin comment This procedure is called with 0≤index<128 by a \chcode operation,
or with index≥128 by a parameter setting operation like \trace or \mathrm;
if curlev > level1 then
	begin if saveptr≥savesize-1 then overflow(savesize);
	savestack[saveptr]←eqtb[index+(hashsize+128)];
	savestack[saveptr+1]←index+(hashsize+128);
	saveptr←saveptr+2 # store two words on savestack;
	end;
if index<0 or index≥eqtbsize-(hashsize+128) or (index<128 and valu≥charcodes) then
	error("Improper code")
else eqtb[index+(hashsize+128)]←valu;
end;

internal integer procedure unsave # clears off top nesting level of savestack
	and returns the ending-routine code;
begin integer t;
curlev ← curlev - level1;
if curlev then
while true do
	begin saveptr←saveptr-1; t←savestack[saveptr] # get top entry;
	if t<0 then return(-t);
	if t≥refct1 then
		begin comment delete control sequence from hash table;
		t←t-refct1; eqdestroy(eqtb[t]);
		eqtb[t] ← 0;
		if t < hashsize then hash[t]←0;
		end
	else	begin comment restore old eqtb entry;
		eqdestroy(eqtb[t]) # after properly disposing of the present one;
		saveptr←saveptr-1;
		eqtb[t]←savestack[saveptr];
		end;
	end
else	begin comment curlev mustn't become zero, preserve definitions at level 1;
	curlev←level1; return(bottomlevel); 
	end;
end;

internal simp procedure newsavelevel(integer endcode) # starts new nesting level;
begin comment The specified ending-routine code is stored on savestack,
initiating a new level of nesting;
if saveptr ≥ savesize then overflow(savesize);
savestack[saveptr] ← -endcode;
saveptr ← saveptr+1;
if(curlev←curlev+level1) ≥ 1 lsh(idlevd+idlevs) then overflow(idlevs);
end;

comment Here is a list of the ending-routine codes used;
internaldef bottomlevel=1,simpleblock=2,trueend=3,aligncode=4,mathcode=5,
outputend=6,noalignend=7,botinsend=8,topinsend=botinsend+1,justend=10,
mathblock=11,mathleft=12,endvcenter=13,endscanmath=14,falseend=15;
comment Hash table algorithms: idlookup,controlseq,idname,hashentry,eqlink;

internal integer hashentry # the most recent hash table location;
boolean nonewcontrolseq # do not define undefined control sequences;

internal procedure idlookup(integer id,len) # searches the hashtable;
begin comment The packed name "id" whose idlen ufield is "len",
or (alternatively) the single-character code "id", is looked up in
the hash table. If not found, it is entered, and the savestack is adjusted
so that the entry will be cleared at the close of the current nesting
level. Upon exit, the appropriate index for this symbol in eqtb will appear
in the global variable "hashentry";
boolean thruonce;
if id≥0 and id<'200 then
	begin comment single character id;
	hashentry ← id+hashsize;
	if eqtb[hashentry]≠0 then return;
	end
else	begin comment multicharacter id; integer t;
	thruonce←false;
	hashentry ← (abs(id)+len) mod hashsize;
	while (t←hash[hashentry])≠0 and(t≠id or ufield(idlen,eqtb[hashentry])≠len)do
		begin hashentry←hashentry-1 # move to next position;
		if hashentry<0 then
			begin if thruonce then overflow(hashsize);
			thruonce←true; hashentry←hashentry+hashsize # cyclically;
			end;
		end;
	if t≠0 then return;
	hash[hashentry]←id;
	end;
comment new control sequence encountered;
if nonewcontrolseq then
	begin if hashentry<hashsize then hash[hashentry]←0;
	hashentry←hashsize+128 # beware trick: this refers to the first
		entry of the charcode table, so it will look like a control
		sequence with cmd=0;
	end
else	begin eqtb[hashentry]←len+curlev;
	if curlev>level1 then
		begin if saveptr≥savesize then overflow(savesize);
		savestack[saveptr]←refct1+hashentry # special savestack entry;
		saveptr←saveptr+1;
		end;
	end;
end;

internal simp procedure controlseq # gets a packed name from the input;
begin comment This procedure removes a control sequence from the string variable
curbuf, assuming that the initial escape character \ has already been removed.
Then this control sequence is found in the hashtable, and hashentry is set;
integer id,len,d;
id←lop(curbuf) # remove first character;
if curbuf=0 then curbuf←'15 # don't remove the '15 at end of curbuf;
comment Note that in, e.g., \% the % should not be treated as a comment delimiter;
len←0 # len represents (length-1) lsh idlend;
if chartype(id)=letter and chartype(curbuf)=letter then
	begin comment two or more letters in the control sequence;
	d←bitsperwd-6; id←id lsh d # pack first character;
	do begin id←id+((lop(curbuf)land '37)lsh(d←d-5)) #
		if d≤-5 this shifts the character out of sight;
		len←len+(1 lsh idlend);
		end until chartype(curbuf)≠letter;
	end;
idlookup(id,len land('7 lsh idlend));
end;

internal string procedure idname(integer h) # the name associated with eqtb[h];
comment This is sort of an inverse to the controlseq procedure;
if h≥hashsize then
	begin integer c; c←h-hashsize;
	if c then return('177&c) else return("NULL");
	comment '177&c will print something appropriate when c is a control
		symbol, otherwise it prints just the character c;
	end
else if hash[h]=0 then return("UNDEFINED")
else	begin integer t; string s;
	define lettersperwd = ((bitsperwd-6)div 5)+1 # number of complete letters;
	define leastsiglet = 2↑(bitsperwd-5*lettersperwd+4)-1 # mask for rthand ltr;
	t←(hash[h] lsh(5*lettersperwd+1-bitsperwd))rot(bitsperwd-5*lettersperwd+5);
	s←(t land '77)lor '100; t←t land (-'100) # remove first letter;
	while t land ('37 rot -5) do
		begin t←t rot 5; s←s&((t land '37)lor '140);
		end;
	if hash[h] land leastsiglet then
		begin t←field(idlen,eqtb[h]);
		while t≠(lettersperwd-1) do
			begin t←(t-1) land '7; s←s&"x" # add x's for correct length;
			end;
		end;
	return(s);
	end;

comment The following global variables are set to positions in the eqtb,
for reference by the semantic routines;
internaldef locsize=9 # size of locs array for storing eqtb locations;
internal saf integer array locs[0:locsize-1];
internaldef lineskiploc=⊂locs[0]⊃, baselineskiploc=⊂locs[1]⊃, parskiploc=⊂locs[2]⊃,
dispskiploc=⊂locs[3]⊃, topskiploc=⊂locs[4]⊃, botskiploc=⊂locs[5]⊃,
tabskiploc=⊂locs[6]⊃, dispaskiploc=⊂locs[7]⊃, dispbskiploc=⊂locs[8]⊃
	# allocation of the "loc" variables;
internaldef fontloc = hashsize+":" # eqtb location for \:;
internaldef xloc(x) = ⊂x⊃&"loc" # eqtb location for x;
internaldef eqlink(x) = ⊂field(link,eqtb[xloc(x)])⊃ # stored link field for x;

internal integer escapechar # set to the first character of user input;
comment This convention ensures that escapechar is a character the user can type;
comment The input stacks: inbuf,curbuf,state,loc,recovery,filename,parstack;

Comment TEX uses two different conventions for representing stacks.
	1) A sequential stack in which there is frequent access to the top
entry, and the stack is essentially never empty. Then the top entry is kept
in a global variable (even better would be a register), and the other entries
are in stack[0] thru stack[ptr-1]. Example: The main input stacks.
	2) A sequential stack with infrequent top access. Then the stack
contents are in stack[0] thru stack[ptr-1]. Example: The save stack.

The state of the scanning routine appears in the following stacks, maintained
with convention #1:;

internaldef stacksize=20 # maximum number of simultaneous input sources;
internal saf string array inbufstack[0:stacksize]; internal string inbuf
	# current lines being input from a character file;
internal saf string array curbfstack[0:stacksize]; internal string curbuf
	# the parts of inbuf that haven't yet been input;
internal saf string array filenmstack[0:stacksize]; internal string filename
	# the names of the current character files;
internal saf integer array statestack[0:stacksize]; internal integer state
	# current scanner state codes;
internal saf integer array locstack[0:stacksize]; internal integer loc
	# current scanner locations;
internal saf integer array recvrystack[0:stacksize]; internal integer recovery
	# information about what to do when done on each level;
comment The upper limit in these declarations is stacksize rather than stacksize-1
so that the dumpcontext routine doesn't cause embarrassing stack overflow;
internal integer inptr # first unused location in input stacks;

comment There are just four state codes:;
internaldef tokenlist=0 # scanning a token list;
internaldef midline=1 # scanning a line of characters;
internaldef skipblanks=1+charcodes # like midline but ignoring blanks;
internaldef newline=1+2*charcodes # beginning a new line of characters;

comment When the state specifies reading from an external character file (i.e.,
when state ≠ tokenlist), inbuf contains the current line, and curbuf contains
the remains of the current line as its characters are being lopped off.
String filename is the name of the file -- this is used only for printing error
messages and returning to the editor (cf. the error procedure in TEXSYS).
The loc contains page number and line number of the current line, in its
respective info and link fields. The channel number appears in recovery.
A null filename denotes input from the user terminal. (In this case loc and
recovery are not used, since such input never reaches the end-of-file.)

When the state specifies reading from an internal linked list of tokens
(i.e., state=tokenlist), inbuf and curbuf and filename are not used.
The loc points to the next token to be scanned, and recovery contains information
about what to do when reaching the end of the list. More precisely,
recovery contains
	-l, if nothing is to be done when the token list starting at l is exhausted
	-(1 lsh infod + l), same as -l but denotes vlist of an alignment
	-(2 lsh infod + l), if alignstate is to be set zero when the token
		list starting at l is exhausted (ulist of an alignment)
	+l, if the token list starting at l is to be destroyed upon completion
	l lsh infod + p, if the token list whose reference count is at l should be
		dereferenced and the parstack is to be pruned until parptr=p.

Macro parameters are kept on parstack, which grows at a different rate than
the others. This stack is maintained with convention #2;

internaldef parsize=13 # max number of simultaneous parameters;
internal saf integer array parstack[0:parsize-1] # token-list ptrs for parameters;
internal integer parptr # first unused location in parstack;
saf integer array pstack[0:parsize-1] # temporary storage for parameter pointers;

string pagewarning # when this string is non-null, the user's source file
	probably shouldn't contain any form-feeds (end-of-page marks);
integer pagewarnindex # reference to hash table for control sequence used
	in error message when giving a pagewarning;

comment One further aspect of the input state appears in the integer variable
alignstate. If this variable is zero, the input tokens ⊗ and \cr are
interrupted in the getnext procedure and procedure aligndelim is called --
the behavior in this case is something like a macro expansion, since ⊗ and \cr
are essentially replaced by the appropriate vlist in a alignment. Furthermore,
each lbrace and rbrace scanned will cause alignstate to be increased or decreased
by 1, respectively. The TEXSEM module explains alignment further;
comment Tokens, token lists, and the diagnostic routines dumplist,dumptokens;

comment A token is either a character or end-paragraph code or control
sequence found in some character file. Sometimes TEX considers tokens
to be a pair (cmd,char) of command and character, but sometimes it
considers these as a unit in packed form;

internaldef chars=9,chard=0 # definition of char field in packed tokens;
internaldef cmds=4,cmdd=chars # definition of cmd field in packed tokens;
comment The cmd field of a token never exceeds 15 (at least the way the codes
are now), and never equals carret. We must have hashsize+127 < 2↑chars;

comment Control sequence tokens are represented by the packed pair (0,hashentry)
where hashentry is the index in eqtb for the control sequence. Since 0 is the
cmd code for an escape character, there is no ambiguity, as an escape by
itself does not constitute a token.

A token list is a singly-linked list of one-word nodes, containing packed
tokens in their info fields. Macro definitions and output-routine definitions
and marks are stored as token lists preceded by a reference-count node.

Two special commands appear in the token lists of macro definitions:
	match [char=0 means match a parameter, char=1 means end of matching]
	outpar [output parameter number char+1].
The enclosing { and } of the right-hand side of a macro definition are omitted.
The final } of an output or mark definition is included in the tokenlist.

The following example macro definition illustrates these conventions:
	\def\mac a#1#2 \b {#1\:a ##1#2 #2}
is represented by a token list containing
	(ref ct), \mac, a, match0, match0, (space), \b, match1,
	outpar0, \:, a, (space), #, 1, outpar1, (space), outpar1.
Note that the macro name appears just after the reference count, this is
for error messages. Procedure macrodef builds such token lists, and
macrocall uses them.

Examples such as
	\def \m {\def \m {a} b}
explain why a reference counter is needed: The eqtb entry for \m is
changed before the token list for m has been consumed, hence we can't
simply destroy the token list when \m is redefined.

The procedure dumplist illustrates the above conventions. It is used
for diagnostic purposes;

internal saf string array tokstring[0:1] # output of dumplist;
internal procedure dumplist(integer p,q) # makes strings out of a token list;
begin comment This procedure is used for diagnostic messages. It creates two
strings from the token list pointed to by p, namely tokstring[0] for all
tokens up to but not including the one pointed to by q, and tokstring[1]
for the remaining tokens if any.  For example, if p points to the node \mac
in the above example and if q points to the second "a", the result will be
	tokstring[0]="\mac a#1#2 \b →#1\: "
	tokstring[1]="a ##1#2 #2".
No reference counters should be in the list pointed to by p. However, this
routine is intended to be robust in the sense that one can try it while
debugging just to see whether a particular memory location makes sense
if regarded as a token list;

integer j # 0 until q is reached, then 1;
integer cmd,char,t,npars; string s;

tokstring[0]←tokstring[1]←null; j←0; npars←"0";
while p do
	begin if p=q then j←1;
	if p<0 or p≥memsize then
		begin tokstring[j]←tokstring[j]&escapechar&"CLOBBERED"; done;
		end;
	t←info(p); cmd←field(cmd,t); char←field(char,t);
	case cmd of begin
	[0] begin string t;
	if char≥hashsize+128 then t←"IMPOSSIBLE" else t←idname(char);
	if length(t)=1 and chartype(t)≠letter then s←escapechar&t
	else s←escapechar&t&" " end;
	[match] if char=0 then s←"#"&(npars←npars+1) else s←"→";
	[outpar] s←"#"&cvs(char+1);
	[macprm] s←"##";
	[spacer] s←" ";
	[parend] s←escapechar&"par ";
	[endv] s←escapechar&"ENDV";
	[lbrace][rbrace][mathbr][tabmrk][supmrk][submrk][letter][otherchar] s←char;
	else s←escapechar&"BAD"
	  end;
	tokstring[j]←tokstring[j]&s;
	if length(tokstring[j])>500 then
		begin tokstring[j]←tokstring[j]&escapechar&"ETC"; done;
		end;
	p←link(p);
	end;
end;

internal string procedure dumptokens(integer p) # simple special case of dumplist;
begin dumplist(p,0); return(tokstring[0]);
end;
comment Maintaining the input stacks: pushinput,popinput,initin,dumpcontext,inslist;

internal simp procedure pushinput # save current input status on the stacks;
if inptr≥stacksize then overflow(stacksize) else
begin inbufstack[inptr]←inbuf;
curbfstack[inptr]←curbuf;
filenmstack[inptr]←filename;
statestack[inptr]←state;
locstack[inptr]←loc;
recvrystack[inptr]←recovery;
inptr←inptr+1;
end;

internaldef inslist(p)=⊂begin pushinput:state←tokenlist:loc←recovery←p end⊃;
comment The above inserts the tokenlist pointed to by p into the input stream
and sets things up so the token list is destroyed afterwards;

internal simp procedure insrclist(integer l) # like inslist for lists with
	reference counts;
begin pushinput; state←tokenlist;
recovery←(l lsh infod)+parptr; loc←link(l);
mem[l]←mem[l]+refct1;
end;

internal simp procedure popinput # finish input level, restore the previous;
begin integer t;
inptr←inptr-1;
inbuf←inbufstack[inptr];
curbuf←curbfstack[inptr];
filename←filenmstack[inptr];
state←statestack[inptr];
loc←locstack[inptr];
recovery←recvrystack[inptr];
end;

define crffbreak=1,ffbreak=2 # break table codes, see below;
internal integer brchar # break character stored by system input;
internal integer eof # end-of-file code stored by system input;

internal procedure initin # get TEX input system ready to start;
begin setbreak(crffbreak,'15&'14,null,"INA") # crffbreak will now read the
	input up to and including a carriage return or page mark,
	ignoring oldstyle line numbers;
setbreak(ffbreak,'14,null,"INS") # ffbreak is used only to read past a
	file directory page, it goes up to the first page mark;
inptr←0 # set input stacks empty;
escapechar←-1 # initially there is no control character defined;
state←newline;
inbuf←curbuf←filename←null;
recovery←0;
pagewarning←null;
nonewcontrolseq←false;
end;

internal string curfile # current input file name, set by dumpcontext;
internal integer curfpage,curfline # set by dumpcontext;

internal procedure dumpcontext # prints where the scanner is;
begin comment This procedure shows the top levels of input, omitting
tokenlists that are about to be flushed (since they were most likely
inserted with inslist), until coming to a level that is a character file;
label processtokens # go here to process tokenlist levels of input;
integer ptr,t; string lf; lf←'12 # line-feed symbol;
ptr←inptr;
inbufstack[ptr]←inbuf;
curbfstack[ptr]←curbuf;
filenmstack[ptr]←filename;
statestack[ptr]←state;
locstack[ptr]←loc;
recvrystack[ptr]←recovery;
processtokens: while statestack[ptr]=tokenlist do
	begin label advance;
	if(t←recvrystack[ptr])<0 then
		begin print(nextline,
		case((-t) lsh -infod) of ("<argument> ","<vlist> ","<ulist> "));
		t←(-t)land((1 lsh infod)-1);
		end
	else if(t←field(info,t))then
		begin comment macrocall or output routine or mark;
		t←link(t) # bypass reference count;
		print(nextline);
		end
	else if locstack[ptr] then
		begin print(nextline,"<to be read again> ");
		t←recvrystack[ptr];
		end
	else go to advance # tokenlist to be flushed, won't be dumped;
	dumplist(t,locstack[ptr]);
	if length(tokstring[0])>32 then print("...");
	print(tokstring[0][∞-31 to ∞],lf,tokstring[1][1 to 32]);
	if length(tokstring[1])>32 then print("...");
advance: ptr←ptr-1;
	end;
curfile←filenmstack[ptr];
curfpage←field(info,locstack[ptr]);
curfline←field(link,locstack[ptr]);
if curfile then print(nextline,"p.",curfpage,",l.",curfline," ")
else print(nextline,"(*) ");
if inbufstack[ptr] = '12 then t←2 else t←1 # ignore initial linefeed;
print(inbufstack[ptr][t to (∞-length(curbfstack[ptr]))],lf,
	curbfstack[ptr]);
if curfile=0 and ptr then
	begin comment this level is an online insertion;
	ptr←ptr-1; go to processtokens;
	end;
print(nextline);
end;
comment The basic input procedure getnext and its cousins gettok,getncnext,getnctok;

integer q # pointer to current node in macrocall procedure, used in error message;
integer itm # next item to store in macrocall procedure, used in error message;
procedure page_end_error # gives error message when page ended unexpectedly;
begin deletions_allowed←false # prevents possible recursion;
if pagewarning="u" then
	begin comment "use of";
	mem[q]←itm lsh infod;
	print(nextline,"Runaway argument?");
	print(nextline,dumptokens(mem[temphead]));
	end;
curbuf←inbuf;
error("Input page ended while scanning "&pagewarning&" "&escapechar
	&idname(pagewarnindex));
deletions_allowed←true;
end;

internal simp procedure getnext # sends next input token to curcmd,curchar;
begin comment This procedure changes the value of hashentry if and only if the
next input token is a control sequence (and, if so, hashentry is the eqtb
location.) Although this procedure has to handle a lot of cases, note that
its inner loop is reasonably short and fast;
label switch; integer t,p;
switch: if state≠tokenlist then
	begin comment reading an external file;
	label innerswitch;
innerswitch:if(curchar←lop(curbuf))then
	case state+(curcmd←chartype(curchar)) of begin
	comment Now curcmd and curchar are set, but we may have to do special
		actions. This case statement tells what to do for each
		combination of state and curcmd, except when there's nothing to do;
	[midline+spacer] begin state←skipblanks; curchar←'40 end;
	[midline+carret] begin state←newline;curbuf←null;curcmd←spacer;
	curchar←'40 end;

	[midline+ignore][skipblanks+ignore][skipblanks+spacer][newline+ignore]
	[newline+spacer] go to innerswitch # ignore the character;

	[midline+escape][skipblanks+escape][newline+escape] begin controlseq;
	t←eqtb[hashentry];curcmd←field(idcmd,t);curchar←field(link,t);
	state←skipblanks end;

	[skipblanks+mathbr][skipblanks+tabmrk]
	[skipblanks+macprm][skipblanks+supmrk][skipblanks+submrk][skipblanks+letter]
	[skipblanks+otherchar][newline+mathbr]
	[newline+tabmrk][newline+macprm][newline+supmrk][newline+submrk]
	[newline+letter][newline+otherchar] state←midline;

	[skipblanks+lbrace][newline+lbrace] begin alignstate←alignstate+1;
	state←midline end;

	[skipblanks+rbrace][newline+rbrace] begin alignstate←alignstate-1;
	state←midline end;

	[midline+lbrace] alignstate←alignstate+1;
	[midline+rbrace] alignstate←alignstate-1;

	[skipblanks+carret] begin state←newline;curbuf←null;go to innerswitch end;
	[newline+carret] begin curbuf←null; curcmd←parend; curchar←0 end;
	else comment do nothing;
	  end
	else	begin comment curbuf is empty, must go to next line of file;
		if filename then
			begin comment reading a character file;
			inbuf←input(recovery,crffbreak) #
				read file up to carriage return or form feed;
			if eof then
				begin comment done with reading a file;
				inbuf←null;
				print(")");
				release(recovery) # deactivate the channel;
				if pagewarning then page_end_error;
				popinput # restore previous status;
				curcmd←parend; curchar←0;
				return # end-of-file ends a paragraph;
				end;
			if brchar=0 then
				begin comment Input line more than 150 chars long;
				integer p,l; p←field(info,loc); l←field(link,loc);
				print(nextline,
				"Warning: Long input line has been broken.",
				nextline,"p.",p,",l.",l+1,":",inbuf);
				loc←loc-1 # compensates for loc←loc+1 below;
				end;
			if tracing land '20 then
				begin if inbuf='12 then p←lop(inbuf);
				if length(inbuf)=1 then inbuf←" "&inbuf;
				print(nextline);
				ifc SUAI or MIT thenc ptostr(0, elsec outstr( endc
					inbuf[1 to ∞-1]) # show inbuf on screen;
				inbuf←inchwl&inbuf[∞ to ∞];
				end;
			if brchar='14 then
				begin comment page mark,inbuf can be ignored;
				p←field(info,loc)+1 # advance page number;
				print(" ",p) # print progress report for user;
				loc ← p lsh infod # reset line number to zero;
				if pagewarning then page_end_error;
				end
			else loc←loc+1 # advance line number;
			comment No attempt is made here to remember the line
				numbers on old style editing systems;
			end
		else if inptr then
			begin comment done with line inserted during error routine;
			popinput; go to switch;
			end
		else	begin comment reading online from terminal;
			print(nextline,"*") # prompt user for input;
			inbuf←inchwl&'15 # append carriage-return deleted by system;
			setprint(null,"F");print(inbuf);setprint(null,"B") #
				echo the input on ERRORS.TMP file for the record;
			if escapechar<0 and (inbuf≠'15) then
				begin escapechar←inbuf # first char input is the \;
				chartype(escapechar)←escape;
				end;
			end;
		curbuf ← inbuf;
		go to innerswitch;
		end
	end
else	begin comment traversing a tokenlist;
	if loc then
		begin t←info(loc) # get token to emit;
		loc←link(loc) # advance to next element of token list;
		curchar←field(char,t);

		case (curcmd←field(cmd,t)) of begin
		[0] begin comment control sequence in token list;
		hashentry←curchar; t←eqtb[curchar];
		curcmd←field(idcmd,t);curchar←field(link,t) end;
		[outpar] begin comment insert a macro parameter;
		pushinput;
		loc←parstack[field(link,recovery)+curchar];
		recovery←-loc;
		comment The state remains at tokenlist;
		go to switch end;
		[lbrace] alignstate←alignstate+1;
		[rbrace] alignstate←alignstate-1;
		else comment do nothing;
		  end

		end
	else	begin comment end of tokenlist;
		if recovery>0 then
			begin if recovery < (1 lsh infod) then dslist(recovery)
			else	begin t←field(info,recovery);
				comment end of macro body, t points to its refcount;
				delrclink(t);
				t←field(link,recovery) # now t is desired parptr;
				while parptr>t do
					begin parptr←parptr-1;
					dslist(parstack[parptr]);
					end;
				end;
			end
		else if recovery≤-(2 lsh infod) then alignstate←0;
		popinput; go to switch;
		end;
	end;
if alignstate=0 and (curcmd=tabmrk or curcmd=carret) then
	begin aligndelim; hashentry←-1; go to switch;
	end;
end;

comment Three other routines are often used instead of getnext, namely:
	gettok, which not only sets curcmd and curchar but also "curtok",
		a packed version of the corresponding input token.
	getncnext, meaning get non-call, which is like getnext but
		if the current token is a user-defined control sequence
		(i.e., a macro call) it is eliminated from the input.
	getnctok, like getncnext but also sets curtok.

The gettok routine has a special test built in to make sure that the token
found is not "endv", since this would be a bad case of misalignment (we
wouldn't want this endv to infiltrate another token list, and gettok is
used only when building token lists). There's also a special test that
converts \par to "parend", so that macro parameter-matching works better;

internal integer curtok # current token set by gettok and getnctok;
internal integer hashpar # address of \par in the hash table;

internal simp procedure gettok # set curcmd, curchar, and curtok;
begin hashentry←-1;
getnext;
if hashentry<0 then curtok←(curcmd lsh cmdd)+curchar 
else if hashentry=hashpar then curtok←(parend lsh cmdd) else curtok←hashentry;
if curcmd=endv then
	begin backerror("Missing "&closegroupchar&" inserted"); curcmd←rbrace;
	end;
end;

internal simp procedure getncnext # get next non-call input token;
while true do
	begin nonewcontrolseq←true; getnext; nonewcontrolseq←false;
	if curcmd=0 then error("Undefined control sequence")
	else if curcmd≠call then return
	else macrocall;
	end;

internal simp procedure getnctok # get next non-call token and set curtok;
while true do
	begin hashentry←-1;
	nonewcontrolseq←true; getnext; nonewcontrolseq←false;
	if hashentry<0 then
		begin curtok←(curcmd lsh cmdd)+curchar; return;
		end
	else if hashentry=hashpar then
		begin curtok←(parend lsh cmdd); return;
		end
	else	begin curtok←hashentry;
		if curcmd=0 then error("Undefined control sequence")
		else if curcmd≠call then return
		else macrocall;
		end;
	end;
comment Defining user control sequences and output routines: macrodef,scantoks;

internal procedure macrodef(integer gdef);
begin comment "\def" or "\gdef" or "\xdef" has just been scanned.
This procedure scans the macro definition and constructs the corresponding token
list as described earlier;
integer npars # number of parameters (as ascii character);
integer p # pointer to previous node in linked list;
integer q # pointer to current node in linked list;
integer itm # current entry to be appended to linked list;
define storeitem=⊂begin p←q; getavail(q);mem[p]←(itm lsh infod)+q;end⊃ #
	stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in right-hand side of definition;
integer defplace # eqtb entry to define;
integer listhead # pointer to reference counter at the beginning of the list;
label finishup # the definition has been scanned;
label storedef # the definition should be stored in eqtb;

gettok;
if (defplace←hashentry)<0 then
	begin backerror("You can only define a control sequence"); return;
	end;
if curcmd=parend or (curcmd≥assignglue and curcmd≤font) then
	begin error("You can't redefine this control sequence"); return;
	end;
getavail(listhead);
getavail(q); mem[listhead]←q # initialize reference counter;
itm ← defplace # first entry on list will point back to the eqtb;
if gdef=2 then
	begin comment \xdef; curcmd←def; p←q; q←scantoks;
	mem[p]←(itm lsh infod)+q; mem[q]←mem[q]+((1+(match lsh cmdd))lsh infod);
	go to storedef;
	end;
npars←"0" # number of parameters seen so far;
while true do
	begin gettok # set curcmd, curchar, curtok;
	storeitem # store previous item and make room for a new one;
	if curcmd=lbrace or curcmd=rbrace then done;
	if curcmd≠macprm then itm←curtok
	else	begin comment a new parameter to be matched when this macro called;
		gettok; if curchar≠(npars←npars+1) or curcmd≠otherchar then
			backerror("Parameters must be numbered consecutively");
		if npars>("0"+parsize) then overflow(parsize);
		comment The previous statement guarantees that pstack, in
			the macrocall procedure, will never overflow;
		itm←match lsh cmdd # store a match0 command;
		end;
	end;
itm←(match lsh cmdd)+1 # store a match1 command;
if curcmd=rbrace then
	begin alignstate←alignstate+1; error("Missing { has been inserted");
	go to finishup;
	end;

comment Now curcmd=lbrace, scan the right-hand side;
unbal←1;
pagewarning←"def of"; pagewarnindex←defplace;
while true do
	begin gettok;
	if curcmd=rbrace then
		begin unbal←unbal-1;
		if unbal=0 then done;
		end
	else if curcmd=lbrace then unbal←unbal+1;
	storeitem;
	if curcmd≠macprm then itm←curtok
	else	begin comment "#" sensed, look for two in a row;
		gettok;
		if curcmd≠macprm then
			begin comment not two in a row, means parameter output;
			if curchar>npars or curchar<"1" then
				begin backerror("Illegal parameter number in "&
				"definition of "&escapechar&idname(defplace));
				itm←curtok # treat as ##;
				end
			else itm←((outpar lsh cmdd)-"1")+curchar;
			end
		else itm←curtok;
		end;
	end;

finishup: comment Now the definition has been scanned, and itm contains
the final token to be stored;
mem[q]←itm lsh infod;
pagewarning←null;
storedef: if gdef then
	begin setufield(idlev,eqtb[defplace],level1);
	q←curlev; curlev←level1 # temporarily switch to level 1;
	end;
eqdefine(defplace,call,listhead) # set eqtb entry;
if gdef then curlev←q;
getnctok; if curcmd≠spacer then backinput # optional space after the definition;
end;

internal integer procedure scantoks # build tokenlist for output and mark, etc.;
begin comment "\output" or "\mark" or "\uppercase" or "\lowercase" or "\xdef\cs"
has just been scanned. This procedure builds a token list somewhat like the
token list of a macro definition, but without parameters, and including the final
} (for output and mark) but not the initial { of the token group,
then it returns a pointer to the reference count heading this list.
Macros are expanded in \mark and \xdef;
integer cur # mark or output or caseshift or def;
integer p # pointer to previous node in linked list;
integer q # pointer to current node in linked list;
integer itm # current entry to be appended to linked list;
define storeitem=⊂begin p←q; getavail(q);mem[p]←(itm lsh infod)+q;itm←curtok end⊃ #
	stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in right-hand side of definition;
integer listhead # pointer to reference counter at the beginning of the list;
integer curhash # pointer to control sequence being defined;

cur←curcmd; curhash←hashentry;
pagewarning←"def of"; pagewarnindex←hashentry;
scanlb # check for the left brace;
getavail(listhead); q←listhead; itm←0 # initialize reference counter;
unbal←1;
while true do
	begin if cur=mark or cur=def then
		begin getnctok;
		pagewarning←"def of"; pagewarnindex←curhash;
		if cur=def and curcmd=count then
			begin insnum(kount[scandigit]); gettok;
			end;
		end
	else gettok;
	storeitem;
	if curcmd=rbrace then
		begin unbal←unbal-1;
		if unbal≤0 then done;
		end
	else if curcmd=lbrace then unbal←unbal+1;
	end;
pagewarning←null;
if cur=def or cur=caseshift then
	begin mem[p]←mem[p] land (-1 lsh infod); freeavail(q);
	end
else	begin mem[q]←itm lsh infod # store final rbrace;
	getnctok;
	if curcmd≠spacer then backinput # allow optional space after output,mark;
	end;
return(listhead);
end;
comment Calling user macros: macrocall;

internal procedure macrocall # invoke a user-defined control sequence;
begin comment "\mac" has just been scanned, where \mac is a control sequence
previously defined with \def. The body of its definition is a tokenlist
beginning with the reference counter in location curchar,
and it has the form described above in the discussion of token lists.
This procedure first scans to find the parameters, placing them in the
auxiliary stack pstack (since the parstack may be losing entries during
this matching process). Then the parameters are placed on parstack and
the right-hand side of the macro body is fed to the scanner;

integer refcount # points to the reference count;
integer defplace # points to the index of \mac in eqtb;
integer npars # number of parameters scanned;
integer p # pointer to previous node in linked list;
comment integer q # pointer to current node in linked list
	(used also in page_end_error);
comment integer itm # current entry to be appended to linked list
	(used also in page_end_error);
define storeitem=⊂begin p←q; getavail(q);mem[p]←(itm lsh infod)+q;end⊃ #
	stores the previous item and makes it point to the current node;
integer unbal # count of {'s minus }'s in parameter being matched;
integer ngrps # number of tokens or {} groups in parameter being matched;
integer prevcmd # final cmd of parameter;
integer r # pointer to current node in macro body;
integer t # current token of interest;
boolean firsterror # no errors noticed yet in this macro call;

firsterror←true;
defplace←hashentry;
r←link(link(refcount←curchar)) # point to first itm after \mac token;
comment defplace should equal info(link(refcount));
npars←0;
pagewarning←"use of";pagewarnindex←defplace;
if tracing land '10 then print(nextline,dumptokens(link(refcount))) # tracing calls;

while (t←info(r))≠((match lsh cmdd)+1) do
	begin q←temphead # mem[temphead] will point to tokenlist created;
	r←link(r);
	if t ≠ match lsh cmdd then
		begin comment input must match token t;
		gettok;
		if curtok≠t and firsterror then
			begin firsterror←false;
			error("Use of "&escapechar&idname(defplace)&
				" doesn't match its definition");
			end;
		end
	else	begin "findparameter";
		if ufield(cmd,t←info(r)) = match lsh cmdd then
			begin comment undelimited parameter;
			t←-1;
			end
		else	begin comment parameter delimited by t;
			r←link(r);
			end;
		itm←0;
		ngrps←0;
		gettok # set curtok to next input token;
		while curcmd=rbrace do
			begin alignstate←alignstate+1;
			error("Argument of "&escapechar&idname(defplace)&
				" can't begin with "&closegroupchar);
			gettok;
			end;
		while curtok≠t do
			begin storeitem; itm←curtok;
			if curcmd=lbrace then
				begin comment scan a {} group;
				unbal←1;
				while true do
					begin gettok;
					storeitem; itm←curtok;
					if curcmd=rbrace then
						begin unbal←unbal-1;
						if unbal=0 then done;
						end
					else if curcmd=lbrace then unbal←unbal+1;
					end;
				end;
			ngrps←ngrps+1;
			prevcmd←curcmd;
			if t<0 then done else gettok;
			end;
		if ngrps=1 and prevcmd=rbrace then
			begin comment strip off enclosing braces;
			mem[p]←mem[p] land (-1 lsh infod) # zero the link field;
			freeavail(q);
			pstack[npars]←link(mem[temphead]);
			freeavail(mem[temphead]);
			end
		else	begin comment attach final symbol to list;
			mem[q]←itm lsh infod;
			pstack[npars]←mem[temphead];
			end;
		if tracing land '10 then print(nextline,"#"&(npars+"1"),"←",
			dumptokens(pstack[npars])) # tracing macro calls;
		npars←npars+1;
		end "findparameter";
	end;

comment Now matching and parameter building are complete, and link(r) points
to the right-hand side of the macro definition;
pagewarning←null;
if parptr+npars>parsize then overflow(parsize);
for q←0 thru npars-1 do parstack[parptr+q]←pstack[q];
pushinput # prepare to insert macrobody in input;
loc←link(r);
state←tokenlist;
recovery←(refcount lsh infod)+parptr;
parptr←parptr+npars;
mem[refcount]←mem[refcount]+refct1 # increase reference count;
end;
comment Accessing user's files: scanfilename, inputfile, definefont;

comment This page contains the most operating-system dependent aspects
of the TEX input system;

comment If the site is not SUAI or MIT then the file "TEXFIL.SAI" should
	contain appropriate code for the abovementioned three procedures;

internal saf string array fontname[0:31] # user name for each font code;

ifc SUAI or MIT thenc

IFSUAI
saf string array fname[0:2] # file name, extension, and directory;
simp procedure scanfilename # sets up fname[0:2];
begin integer j;
fname[0]←fname[1]←fname[2]←null;
j←0;
while true do
	begin getnctok;
	if curcmd = spacer then done;
	if curcmd≥charcodes then
		begin backerror("Blank space should follow file name"); done;
		end;
	if curchar = "." then j←1
	else if curchar = "[" then j←2;
	fname[j]←fname[j]&curchar;
	end;
end;
ENDSUAI

IFMIT
string fnamedir, fname1, fname2;
simp procedure scanfilename # sets up file name in fnamedir, fname1, fname2;
begin
    string fcomp;           # file name component;
    boolean firstchar, vbar;
    fcomp←fnamedir←fname1←fname2←null;
    firstchar←true;
    vbar←false;
    while true do
	    begin
		getnctok;
		if firstchar and curcmd = spacer then done;
		if curcmd≥charcodes then
			begin backerror("Blank space should follow file name"); done;
			end;
		if curcmd = spacer then
		    begin
			if fcomp≠null then
				if fname1=null then fname1←fcomp else fname2←fcomp;
			fcomp←null;
			if not vbar then done
		    end
		else if curchar = "|" then
			if firstchar then vbar←true else done
		else if curchar = ";" then
		    begin
			if fcomp≠null then fnamedir←fcomp;
			fcomp←null
		    end
		else fcomp←fcomp&curchar;
		firstchar←false;
	    end;
    if fcomp≠null then
	    if fname1=null then fname1←fcomp else fname2←fcomp;
    if fnamedir≠null then fnamedir←fnamedir&";";
    if fname1=null then fname1←"@";
end;
ENDMIT

internal procedure inputfile;
begin comment "\input" has just been scanned. This procedure scans the user's
file name, employing the appropriate operating system naming conventions,
then reads in the first line and feeds it to the input system;
integer chan;
label abort # if something goes wrong trying to read the file;
label retry # go here to try again;
boolean firsttry # first attempt to read the file;
string flname;
integer pageno # number of pages successfully read;
define checkeof=⊂if eof then begin print(")");go to abort end⊃;
firsttry←true; retry:
scanfilename;
IFSUAI
	if fname[1]=0 then fname[1]←".TEX";
	flname←fname[0]&fname[1]&fname[2];
ENDSUAI
IFMIT
	if fname2=null then fname2←">";
	flname←fnamedir&fname1&" "&fname2;
ENDMIT
open(chan←getchan,"DSK",0,if inptr=0 then 19 else 2, 0,
	150,brchar,eof);
comment On the SAIL system, 19 buffers is the most efficient for disk files;
comment The lines read in must have at most 150 characters;
lookup(chan,flname,eof);
IFSUAI
if eof then lookup(chan,fname[0]&fname[1]&"[1,3]",eof);
ENDSUAI
IFMIT
if eof then lookup(chan,"TEX;"&fname1&" "&fname2,eof);
ENDMIT
if eof then
	begin error("Lookup failed on file "&flname);
	if firsttry then
		begin firsttry←false; release(chan); go to retry;
		end;
	go to abort;
	end;
print(" (",flname);
pushinput # save present file status;
state←newline; recovery←chan; filename←flname;
inbuf←input(chan,crffbreak) # get first line of file;
checkeof; print(" 1");
if equ(inbuf[1 to 9],"COMMENT ⊗") then
	begin comment Skip TVedit directory page;
	while brchar≠'14 and not eof do inbuf←input(chan,ffbreak);
	checkeof;
	inbuf←input(chan,crffbreak) # get first line of second page;
	checkeof; print(" 2");
	pageno←2;
	end
else pageno←1;
while brchar='14 do
	begin comment Ignore empty pages at beginning of file;
	inbuf←input(chan,crffbreak); checkeof; pageno←pageno+1; print(" ",pageno);
	end;
loc ← (pageno lsh infod) + 1 # line 1 of the current page;
if tracing land '20 then
	begin integer p # garbage bin;
	if inbuf='12 then p←lop(inbuf);
	if length(inbuf)=1 then inbuf←" "&inbuf;
	print(nextline);
	ptostr(0,inbuf[1 to ∞-1]) # show inbuf on screen;
	inbuf←inchwl&inbuf[∞ to ∞];
	end;
curbuf←inbuf;

comment Now define the output file name if it hasn't yet been defined;
IFSUAI
if ofilname=0 then declareofil(fname[0]&ofilext&fname[2]);
ENDSUAI
IFMIT
if ofilname=0 then declareofil(fnamedir&fname1&" "&ofilext);
ENDMIT
return;

abort: release(chan);
popinput;
end;

internal procedure definefont(integer f) # Do this after seeing "=" of font def;
begin
    integer n,p,chan; string s;
    label retry # go here to try again;
    boolean firsttry # first attempt to read the file;
    firsttry←true; retry:
    scanfilename;
IFSUAI
    if fname[2]=0 then fname[2]←libraryarea;
    fontname[f]←fname[0]&fname[2];
ENDSUAI
IFMIT
    if fnamedir=null then fnamedir←libraryarea;
    fontname[f]←fnamedir&fname1&" "&fname2;
ENDMIT
    if parbase[f]=0 then
	    begin comment font information not preloaded;
	    open(chan←getchan,"DSK",8,2,0,0,0,eof);
IFSUAI
	    lookup(chan,s←fname[0]&deviceext&fname[2],eof);
ENDSUAI
IFMIT
	    lookup(chan,s←fnamedir&fname1&" "&deviceext,eof);
ENDMIT
	    if eof then
		    begin error("Lookup failed on file "&s);
IFSUAI
		    if firsttry then
			    begin firsttry←false; release(chan); go to retry;
			    end;
ENDSUAI
		    quit;
		    end;
	    readfontinfo(chan,f) # input font info for use by TEXSEM and TEXOUT;
	    release(chan);
	    end;
    p←fontglue+f*gluespecsize # location of "font glue";
    mem[p]←1000000 lsh infod # "infinite" reference count;
    gluespace(p)←fontpar(f,spacewd);
    gluestretch(p)←fontpar(f,spacestr);
    glueshrink(p)←fontpar(f,spaceshr);
end;
elsec require "TEXFIL.SAI" source_file; endc

comment Basic scanning routines: backinput,scandigit,scanlb,scanstring,scannumber;

internal simp procedure backinput # puts curtok back into the input;
begin comment When using this procedure, be sure to have called gettok or getnctok
instead of getnext or getncnext;
integer p;
getavail(p);
mem[p]←curtok lsh infod # create a tokenlist of length 1;
if curcmd=lbrace then alignstate←alignstate-1
else if curcmd=rbrace then alignstate←alignstate+1;
inslist(p);
end;

internal integer simp procedure scandigit # scans "0"..."9";
begin comment If the next input token is a digit, this procedure returns that
digit (in ascii code). Otherwise this procedure gives an error message and
returns "0";
integer d;
getnctok; d←curchar;
if curtok<(otherchar lsh cmdd)+"0" or curtok>(otherchar lsh cmdd)+"9" then
	begin backerror("Missing digit (0 to 9), 0 inserted");
	d←"0";
	end;
getnctok; if curcmd≠spacer then backinput;
return(d);
end;

internal simp procedure scanlb # scans {;
begin comment If the next input token is not a left brace delimiter, 
this procedure gives an error message. Routines using this procedure
assume that a left brace is present;
getnctok;
if curcmd≠lbrace then
	begin alignstate←alignstate+1; backerror("Missing { inserted");
	end;
end;

internal boolean procedure scanstring(string s) # scans a given letter string;
begin comment Here s is a string of letters. This procedure returns
true and removes s if the next characters of the input agree with s,
otherwise it returns false and effectively leaves the input unchanged;
string ss; integer c,q,p,head;
ss←s;
while c←lop(ss) do
	begin getnctok;
	if curtok≠(letter lsh cmdd)+c then
		begin comment match failed, we construct a token string to insert;
		getavail(q); head←q;
		while length(s)≠length(ss)+1 do
			begin p←q; getavail(q);
			mem[p]←((lop(s)+(letter lsh cmdd))lsh infod)+q;
			end;
		mem[q]←curtok lsh infod;
		if curcmd=lbrace then alignstate←alignstate-1
		else if curcmd=rbrace then alignstate←alignstate+1;
		inslist(head);
		return(false);
		end;
	end;
return(true);
end;

internal integer nbrlength # length of scanned number;
internal integer nbrsign # sign, if any, preceding scanned number;

internal simp integer procedure scannumber # scans a decimal or octal number;
begin comment This procedure removes from the input a string of the form
	space* [+ space* | - space*] {['] digit* | \count digit} [space]
where ' denotes octal radix, and returns the corresponding decimal or octal
value of the digit string. Global variable nbrlength is set to the
number of digits, and nbrsign is set to "+" or "-" if a sign appeared;
integer n,radix;
n←nbrsign←nbrlength←0;
do getnctok until curcmd≠spacer;
if curtok=(otherchar lsh cmdd)+"+" or curtok=(otherchar lsh cmdd)+"-" then
	begin nbrsign←curchar;
	do getnctok until curcmd≠spacer;
	end;
if curcmd=count then
	begin n←kount[scandigit]; getnctok;
	end
else	begin
	if curtok≠(otherchar lsh cmdd)+"'" then radix←10
	else	begin radix←8; getnctok;
		end;
	while curtok≥(otherchar lsh cmdd)+"0" and curtok≤(otherchar lsh cmdd)+"9" do
		begin n←radix*n+curchar-"0";
		nbrlength←nbrlength+1;
		getnctok;
		end;
	end;
if curcmd≠spacer then backinput;
return(n);
end;
comment Further scanning routines: scanlength,scanposlength,scanglue,scanspec;

internal real procedure scanlength # scans a dimen specification;
begin comment This procedure scans the input for
	<number> [. <number>] <unit> [space]
and returns the corresponding value in points;
comment If the number after the decimal point is octal or signed,
no error is detected but the result may be unusual;
integer n; real x,sign;
x←scannumber;
if nbrsign="-" then sign←-1.0 else sign←+1.0;
getnctok;
if curtok=(otherchar lsh cmdd)+"." then
	begin n←scannumber;
	x←x+n/10.0↑nbrlength;
	end
else backinput;
ifc SUAI or MIT thenc
if scanstring("xgp") then x←x*(200.0*0.013837/xgpconv) # for Stanford XGP only;
endc
IFPARC
if scanstring("true") then x←x/rfudge # for PARC printers only;
ENDPARC
if scanstring("pt") then comment already in points;
else if scanstring("in") then x←x/0.013837
else if scanstring("pc") then x←x*12.0
else if scanstring("cm") then x←x/(0.013837*2.54)
else if scanstring("mm") then x←x/(0.013837*25.4)
else if scanstring("dd") then x←x*(1.0/(26.6*2.54*0.013837))
else if scanstring("vu") then x←x*pagemem[varunitmem]
else if scanstring("em") then
	begin integer curfont; curfont←eqlink(font);
	if curfont≤'37 then x←x*fontpar(curfont,quad);
	end
else error("Illegal unit of measure (pt inserted)");
getnctok; if curcmd≠spacer then backinput;
return(x*sign);
end;

real procedure scanposlength # scans a dimen, gives error if negative;
begin real r; r←scanlength;
if r>0 then return(r);
if r<0 then error("This dimension shouldn't be negative"); return(epsilon);
end;

internal integer procedure scanglue # scans a glue specification;
begin comment This procedure scans the input for
	<length> [plus <length>] [minus <length>]
and returns a pointer to a new glue node having these parameters;
integer p; 
p←getnode(gluespecsize);
gluespace(p)←scanlength;
if scanstring("plus") then gluestretch(p)←scanlength;
if scanstring("minus") then glueshrink(p)←scanlength;
return(p);
end;

internal procedure scanspec # scans a justification specification and a {;
begin comment
	  If the input is		  then this procedure puts on savestack
	to [space] size space* {	hsize or vsize (acc. to current mode), 0
	to <length> space* {		value(<length>), 0
 	expand <length> space* {	value(<length>), 1
	space* {				0, 1
	par [space] size space* {		hsize, 2  (mode=-hmode only)
	par <length> space* {		value(<length>), 2  (mode=-hmode only);
real v; integer c;
if scanstring("to") then
	begin getnctok; if curcmd≠spacer then backinput;
	if scanstring("size") then
		if mode=-vmode then v←pagemem[vsizemem]
		else v←pagemem[hsizemem]
	else v←scanlength;
	c←0;
	end
else if mode=-hmode and scanstring("par") then
	begin getnctok; if curcmd≠spacer then backinput;
	if scanstring("size") then v←pagemem[hsizemem]
	else v←scanlength;
	c←2;
	end
else	begin c←1; if scanstring("expand") then v←scanlength else v←0;
	end;
do getnctok until curcmd≠spacer;
if curcmd≠lbrace then
	begin alignstate←alignstate+1; backerror("Missing { inserted");
	end;
savestack[saveptr]←memory[location(v),integer];
savestack[saveptr+1]←c;
saveptr←saveptr+2 # It's not necessary to check for stack overflow here;
end;
comment Additional scanning routines: scanfont,scandelim,scanrulespec;

internal simp integer procedure scanfont # scan a font code;
begin comment This procedure scans a font letter code, having the
following syntax:
	<char> [<space>]
if this font character has already been defined,
	<char> < ← | = > <font file name>
otherwise;
integer f;
while true do
	begin getnctok; f←curtok land '37;
	if curcmd<charcodes then done;
	backerror("Illegal font code");
	end;
getnctok;
if fontname[f] then
	begin if curcmd≠spacer then backinput # ignore space after font codename;
	end
else	begin comment new font code;
	while curchar≠"=" and curchar≠ ifc MIT thenc "_" elsec "←" endc do
		begin backerror("First use of font must define it"); getnctok;
		end;
	definefont(f);
	end;
return(f);
end;

internal saf integer array delimtable[0:127] # contains 18-bit delimiter codes
	for all known delimiters, or -1 for nondelimiters;

internal integer procedure scandelim # scans a math delimiter;
begin comment This procedure scans a delimiter and returns the 18-bit
delimiter code according to the math mode conventions described in TEXSEM;
label unknown;
getnctok;
if curcmd=otherchar then
	if delimtable[curchar]≥0 then return(delimtable[curchar])
	else go to unknown;
if curcmd=mathonly then
	begin curchar←curchar land '777;
	if curchar≥'542 and curchar≤'553 then return(curchar*'1001+('604-'542))
	else go to unknown;
	end;
if curcmd=ascii then return(scannumber land '777777);
unknown: backerror("Unknown delimiter"); return(0);
end;

internal integer procedure scanrulespec # scans rule dimensions;
begin comment This procedure is called just after \hrule or \vrule was sensed,
it returns a pointer to corresponding rule node;
integer p; label loop;
p←getnode(rulenodesize);
mem[p]←rulenode lsh typed;
if curcmd=hrule then begin width(p)←-1.0; height(p)←0.4 end
else begin width(p)←0.4; height(p)←depth(p)←-1.0 end;
loop: if scanstring("width") then width(p)←scanposlength;
if scanstring("height") then begin height(p)←scanposlength; go to loop end;
if scanstring("depth") then begin depth(p)←scanposlength; go to loop end;
return(p);
end;
comment Still more scanning routines: passblock,insnum,scancond;

internal procedure passblock # scans past an entire {} block and optional space;
begin integer unbal;
unbal←0;
while true do
	begin gettok;
	if curcmd=rbrace then
		begin unbal←unbal-1;
		if unbal≤0 then done;
		end
	else if curcmd=lbrace then unbal←unbal+1;
	end;
if unbal<0 then
	begin alignstate←alignstate+1; error("Missing { inserted");
	end;
getnctok; if curcmd≠spacer then backinput;
end;

preload_with 1000,500,100,50,10,5,1; saf integer array romval[1:7];
define lt(x)=⊂((letter lsh cmdd)+"x")lsh infod⊃;
preload_with lt(m),lt(d),lt(c),lt(l),lt(x),lt(v),lt(i);
saf integer array romtok[1:7];

internal procedure insnum(integer n) # puts string version of n into input;
begin comment if n is negative, the Roman numeral value of n is placed
into the input stream, otherwise the decimal value of n is placed there;
integer p,q;
if n≥0 then
	begin comment decimal number, build tokenlist from right to left;
	p←0;
	do	begin getavail(q);
		mem[q]←(((n mod 10)+("0"+(otherchar lsh cmdd)))lsh infod)+p;
		p←q; n←n div 10;
		end until n=0;
	end
else	begin comment roman numeral, build tokenlist from left to right;
	integer itm,j,k;
	p←temphead; itm←0;
	j←1; n←-n;
	while true do
		begin while n≥romval[j] do
			begin getavail(q); mem[p]←itm+q;
			p←q; itm←romtok[j];
			n←n-romval[j];
			end;
		if n=0 then done;
		k←j+1+(j land 1) # m,d → c	c,l → x		x,v → i;
		if n+romval[k]≥romval[j] then
			begin getavail(q); mem[p]←itm+q;
			p←q; itm←romtok[k];
			n←n+romval[k];
			end
		else j←j+1;
		end;
	mem[p]←itm;
	p←mem[temphead] # p points to the tokenlist;
	end;
inslist(p);
end;

internal procedure scancond(boolean b) # scanning for if-then-else constructs;
if b then
	begin scanlb # must find {;
	newsavelevel(trueend);
	end
else 	begin passblock # skip the true part;
	getnctok;
	if curcmd≠elsecode then backerror(escapechar&"else required here");
	scanlb; newsavelevel(falseend);
	end;
end